; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=wasm32 -mattr=+simd128 -opaque-pointers | FileCheck %s

define void @f(ptr %0, ptr %pr) {
; CHECK-LABEL: f:
; CHECK:         .functype f (i32, i32) -> ()
; CHECK-NEXT:    .local v128
; CHECK-NEXT:  # %bb.0: # %BB
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.load64_zero 0
; CHECK-NEXT:    v128.const 0, 1, 0, 0
; CHECK-NEXT:    i32x4.gt_u
; CHECK-NEXT:    local.tee 2
; CHECK-NEXT:    i32x4.extract_lane 0
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.shr_u
; CHECK-NEXT:    local.tee 0
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i32.mul
; CHECK-NEXT:    i8x16.replace_lane 0
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i32x4.extract_lane 1
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.shr_u
; CHECK-NEXT:    local.tee 0
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i32.mul
; CHECK-NEXT:    i8x16.replace_lane 1
; CHECK-NEXT:    v128.store16_lane 0, 0
; CHECK-NEXT:    # fallthrough-return
BB:
  %v0 = load <2 x i32>, ptr %0
  %v1 = icmp ugt <2 x i32> %v0, <i32 0, i32 1>
  %v2 = zext <2 x i1> %v1 to <2 x i8>
  %v3 = ashr <2 x i8> <i8 16, i8 16>, %v2
  %v4 = mul <2 x i8> %v3, %v3
  store <2 x i8> %v4, ptr %pr
  ret void
}

